1 module dataframe.typed; 2 import dataframe.dataframe; 3 import std.conv; 4 import std.csv; 5 import std.datetime; 6 import std.exception; 7 import std.range:array, stride,only; 8 import std.stdio; 9 import std.variant; 10 import std.string:isNumeric; 11 import std.typecons:tuple,Tuple; 12 import std.traits; 13 import std.file; 14 15 DataFrameTyped typedFrameFromCSV(string filename, string[] titles) 16 { 17 DataFrame frame; 18 frame=frame.loadCSVFile(filename,(titles.length>0)); 19 DataFrameTyped typedFrame; 20 auto newTitles=(titles.length==0)? 21 frame.indexTitle~frame.columnTitles: 22 titles; 23 typedFrame=typedFrame.setColumnTitles(newTitles) 24 .setColumnTypes(frame.findColumnTypes) 25 .loadCSVFile(filename,newTitles,(titles.length==0)); 26 return typedFrame; 27 } 28 29 struct DataFrameTypedRow 30 { 31 DataFrameTyped *frame; 32 size_t rowNumber; 33 34 /* auto opIndexAssign(T)(T value, string colName) 35 { 36 import std.algorithm:canFind; 37 enforce(frame.columnTitles.canFind(colName)); 38 (*frame)[rowNumber,colName]=value; 39 return this; 40 } 41 */ 42 auto opDispatch(string colName)() 43 { 44 import std.algorithm:canFind; 45 enforce(frame.columnTitles.canFind(colName)); 46 return (*frame).loadCell!KalVariant(rowNumber,colName); 47 } 48 void opDispatch(string colName,T)(T value) 49 { 50 import std.algorithm:canFind; 51 enforce(frame.columnTitles.canFind(colName)); 52 (*frame)[rowNumber,colName]=value; 53 } 54 /* T loadCell(T)(string series) 55 { 56 return (*frame).loadCell!T(rowNumber,series); 57 }*/ 58 } 59 60 61 struct DataFrameTyped 62 { 63 string title; 64 string[] columnTitles; 65 ColumnType[string] columnTypes; 66 67 char separator=','; 68 char quote='\"'; 69 size_t numRows; 70 struct Values 71 { 72 double[][string] doubles; 73 int[][string] ints; 74 long[][string] longs; 75 std.datetime.Date[][string] dates; 76 std.datetime.DateTime[][string] dateTimes; 77 string[][string] strings; 78 } 79 Values values; 80 size_t[string] stringSizes; 81 82 83 auto setRows(size_t rows) 84 { 85 this.length=rows; 86 return this; 87 } 88 89 auto insertColumn(T)(string title,ColumnType type,T[] vals) 90 { 91 this.columnTitles~=title; 92 this.columnTypes[title]=type; 93 final switch(type) with(ColumnType) 94 { 95 case Double: 96 values.doubles[title]=vals; 97 break; 98 case Int: 99 values.ints[title]=vals; 100 break; 101 case Long: 102 values.longs[title]=vals; 103 break; 104 case Date: 105 values.dates[title]=vals; 106 break; 107 case DateTime: 108 values.dateTimes[title]=vals; 109 break; 110 } 111 return this; 112 } 113 114 auto deleteColumn(string title) 115 { 116 import std.algorithm:countUntil; 117 auto i=columnTitles.countUntil(title); 118 enforce(i>=0); 119 final switch(columnTypes[title]) with(ColumnType) 120 { 121 case Double: 122 values.doubles.remove(title); 123 break; 124 case Int: 125 values.ints.remove(title); 126 break; 127 case Long: 128 values.longs.remove(title); 129 break; 130 case Date: 131 values.dates.remove(title); 132 break; 133 case DateTime: 134 values.dateTimes.remove(title); 135 break; 136 case String: 137 values.strings.remove(title); 138 break; 139 } 140 columnTypes.remove(title); 141 if (i==0) 142 columnTitles=columnTitles[1..$]; 143 else if (i==columnTitles.length) 144 columnTitles=columnTitles[0..$-1]; 145 else 146 columnTitles=columnTitles[0..i]~columnTitles[i+1..$]; 147 return this; 148 } 149 void mergeCell(DataFrameTyped frame, string series, size_t rowNum) 150 { 151 final switch(frame.columnTypes[series]) with(ColumnType) 152 { 153 case Double: 154 values.doubles[series]~=frame.values.doubles[series][rowNum]; 155 return; 156 case Int: 157 values.ints[series]~=frame.values.ints[series][rowNum]; 158 return; 159 case Long: 160 values.longs[series]~=frame.values.longs[series][rowNum]; 161 return; 162 case Date: 163 values.dates[series]~=frame.values.dates[series][rowNum]; 164 return; 165 case DateTime: 166 values.dateTimes[series]~=frame.values.dateTimes[series][rowNum]; 167 return; 168 case String: 169 values.strings[series]~=frame.values.strings[series][rowNum]; 170 return; 171 } 172 173 } 174 void appendCell(T)(string series, T value) 175 { 176 final switch(columnTypes[series]) with(ColumnType) 177 { 178 case Double: 179 values.doubles[series]~=value.to!double; 180 case Int: 181 values.ints[series]~=value.to!int; 182 case Long: 183 values.longs[series]~=value.to!long; 184 case Date: 185 values.dates[series]~=value.to!Date; 186 case DateTime: 187 values.dateTimes[series]~=value.to!DateTime; 188 case String: 189 values.strings[series]~=value.to!string; 190 } 191 } 192 193 T loadCell(T)(size_t row, string series) 194 if(std.traits.isNumeric!T) 195 { 196 final switch(columnTypes[series]) with(ColumnType) 197 { 198 case Double: 199 return values.doubles[series][row].to!T; 200 case Int: 201 return values.ints[series][row].to!T; 202 case Long: 203 return values.longs[series][row].to!T; 204 case Date,DateTime: 205 throw new Exception("cannot convert date/datetime to numeric type "~T.stringof); 206 case String: 207 throw new Exception("cannot convert string to numeric type "~T.stringof); 208 } 209 assert(0); 210 //return (loadCell!(T[])(series,row,row+1))[0]; 211 } 212 T loadCell(T)(size_t row, string series) 213 if(is(T==DateTime) || is(T==Date)) 214 { 215 final switch(columnTypes[series]) with(ColumnType) 216 { 217 case Double,Int,Long: 218 throw new Exception("cannot convert number to date type"); 219 case Date: 220 return values.dates[series][row].to!T; 221 case DateTime: 222 return values.dateTimes[series][row].to!T; 223 case String: 224 return cast(std.datetime.DateTime)SysTime.fromSimpleString(values.strings[series][row]); 225 } 226 assert(0); 227 //return (loadCell!(T[])(series,row,row+1))[0]; 228 } 229 T loadCell(T)(size_t row, string series) 230 if(is(T==string)) 231 { 232 final switch(columnTypes[series]) with(ColumnType) 233 { 234 case Double: 235 return values.doubles[series][row].to!T; 236 case Int: 237 return values.ints[series][row].to!T; 238 case Long: 239 return values.longs[series][row].to!T; 240 case Date: 241 return values.dates[series][row].to!T; 242 case DateTime: 243 return values.dateTimes[series][row].to!T; 244 case String: 245 return values.strings[series][row]; 246 } 247 assert(0); 248 //return (loadCell!(T[])(series,row,row+1))[0]; 249 } 250 /* 251 T loadCell(T)(string series, size_t start, size_t end) 252 { 253 final switch(columnTypes[series]) with(ColumnType) 254 { 255 case Double: 256 return values.doubles[series][start..end].to!T; 257 case Int: 258 return values.ints[series][start..end].to!T; 259 case Long: 260 return values.longs[series][start..end].to!T; 261 case Date: 262 return values.dates[series][start..end].to!T; 263 case DateTime: 264 return values.dateTimes[series][start..end].to!T; 265 case String: 266 return values.strings[series][start..end].to!T; 267 } 268 } 269 */ 270 DataFrameTyped setTitle(string title) 271 { 272 this.title=title; 273 return this; 274 } 275 DataFrameTyped setColumnTitles(string[] titles) 276 { 277 this.columnTitles=titles; 278 return this; 279 } 280 DataFrameTyped setColumnTypes(ColumnType[] columnTypes) 281 { 282 foreach(i,title;columnTitles) 283 this.columnTypes[title]=columnTypes[i]; 284 enforce(this.columnTitles.length==this.columnTypes.keys.length); 285 return this; 286 } 287 288 size_t length() @property 289 { 290 return this.numRows; 291 } 292 293 void length(size_t rows) @property 294 { 295 if (rows==this.numRows) 296 return; 297 foreach(col;columnTitles) 298 { 299 final switch(columnTypes[col]) with(ColumnType) 300 { 301 case Double: 302 this.values.doubles[col].length=rows; 303 break; 304 case Int: 305 this.values.ints[col].length=rows; 306 break; 307 case Long: 308 this.values.longs[col].length=rows; 309 break; 310 case Date: 311 this.values.dates[col].length=rows; 312 break; 313 case DateTime: 314 this.values.dateTimes[col].length=rows; 315 break; 316 case String: 317 this.values.strings[col].length=rows; 318 break; 319 } 320 } 321 this.numRows=rows; 322 } 323 324 DataFrameTyped setIndexValues(T)(T[] indexValues) 325 { 326 this.length=indexValues.length; 327 final switch(this.indexType) with(ColumnType) 328 { 329 case Double: 330 foreach(i,value;indexValues) 331 this.values.doubles[i*numDoubleCols]=value; 332 break; 333 case Int: 334 foreach(i,value;indexValues) 335 this.values.ints[i*numIntCols]=value; 336 break; 337 case Long: 338 foreach(i,value;indexValues) 339 this.values.longs[i*numLongCols]=value; 340 break; 341 case String: 342 foreach(i,value;indexValues) 343 this.values.strings[i*numStringCols]=value; 344 break; 345 case Date: 346 foreach(i,value;indexValues) 347 this.values.dates[i*numDateCols]=value; 348 break; 349 case DateTime: 350 foreach(i,value;indexValues) 351 this.values.datetimes[i*numDateTimeCols]=value; 352 break; 353 } 354 foreach(i,value;indexValues) 355 this.indexValues[i]=indexValues; 356 return this; 357 } 358 359 /* DataFrameTyped setCellValues(KalVariant[][] cellValues) 360 { 361 foreach(i,row;cellValues) 362 { 363 foreach(j,cell;row) 364 { 365 this[i,j+1]=cell; 366 } 367 } 368 return this; 369 } 370 DataFrameTyped setAllValues(KalVariant[][] values) 371 { 372 foreach(i,row;values) 373 { 374 this.indexValues[i]=values[i][0]; 375 foreach(j,cell;row[1..$]) 376 { 377 this[i,j+1]=cell; 378 } 379 } 380 return this; 381 } 382 */ 383 DataFrameTyped loadCSVFile(string csv, string[] columnTitles=[],bool skipFirst=false) 384 { 385 auto file=std.file.read(csv); 386 return loadCSV(cast(string) file,columnTitles,skipFirst); 387 } 388 389 DataFrameTyped setSeparator(char separator) 390 { 391 this.separator=separator; 392 return this; 393 } 394 DataFrameTyped setQuote(char separator) 395 { 396 this.quote=separator; 397 return this; 398 } 399 400 DataFrameTyped mergeFrames(DataFrameTyped frame) 401 { 402 enforce((frame.title=="") || (frame.title==this.title) || (this.title=="")); 403 enforce(frame.columnTitles.length==0 || frame.columnTitles==this.columnTitles || this.columnTitles.length==0); 404 enforce(frame.columnTypes==this.columnTypes); 405 foreach(colTitle;frame.columnTitles) 406 { 407 foreach(rowNum;0..frame.numRows) 408 mergeCell(frame,colTitle,rowNum); 409 } 410 this.numRows+=frame.numRows; 411 // should do sort and uniq 412 return this; 413 } 414 415 size_t numCols() 416 { 417 return columnTypes.length; 418 } 419 420 auto opIndex(size_t row) 421 { 422 return DataFrameTypedRow(&this,row); 423 } 424 /* 425 auto opIndexAssign(DataFrameTypedRow rowData,size_t rowNumber) 426 { 427 foreach(j,col;cols) 428 { 429 this[rowNumber,columnTitles[j]]=rowData.frame[] 430 } 431 */ 432 T opIndex(T)(size_t row, size_t col) 433 { 434 //enforce((row>=0) && (col>=0) && (col <=numCols) &&(row<=indexValues.length)); 435 return loadCell!T(columnTitles[col],row,row+1); 436 } 437 438 T opIndex(T)(size_t row, string col) 439 { 440 return loadCell!T(col,row,row+1); 441 } 442 443 T opIndex(T)(size_t[] rows, size_t[] cols) 444 { 445 T[][] ret; 446 ret.length=rows.length; 447 foreach(ref line;ret) 448 line.length=cols.length; 449 foreach(i,row;rows) 450 { 451 foreach(j,col;cols) 452 { 453 ret[i][j]=loadCell!T(columnTitles[col],row,row+1); 454 } 455 } 456 return ret; 457 } 458 459 T opIndexAssign(T)(T value, size_t row, size_t col) 460 { 461 return opIndexAssign!T(value,row,this.columnTitles[col]); 462 } 463 464 T opIndexAssign(T)(T value, size_t row, string col) 465 { 466 //stdout.writefln("opIndexAssign %s,%s,%s",value,row,col); 467 //stdout.writefln("this.values.strings.keys=%s",this.values.strings.keys); 468 //stdout.writefln("T=%s",typeid(T)); 469 //stdout.flush; 470 // enforce type safety for columns 471 //enforce((row>=0) && (col>=0) && (col <=numCols) &&(row<=indexValues.length)); 472 final switch(columnTypes[col]) with(ColumnType) 473 { 474 case Double: 475 this.values.doubles[col][row]=value.to!double; 476 return value; 477 case Int: 478 this.values.ints[col][row]=value.to!int; 479 return value; 480 case Long: 481 this.values.longs[col][row]=value.to!long; 482 return value; 483 case String: 484 this.values.strings[col][row]=value.to!string; 485 return value; 486 case ColumnType.Date: 487 static if(is(T==std.datetime.Date)) 488 this.values.dates[col][row]=value; 489 else static if(is(T==std.datetime.DateTime)) 490 this.values.dates[col][row]=value.dateTimeToDate; 491 else static if(is(T==std.datetime..string)) 492 this.values.dates[col][row]=value.stringToDate; 493 return value; 494 case ColumnType.DateTime: 495 static if(is(T==std.datetime.DateTime)) 496 this.values.dateTimes[col][row]=value; 497 else static if(is(T==std.datetime.Date)) 498 this.values.dateTimes[col][row]=value.dateToDateTime; 499 else static if(is(T==std.datetime..string)) 500 this.values.dateTimes[col][row]=value.stringToDateTime; 501 return value; 502 } 503 } 504 505 T[] columnValues(T)(string col) 506 { 507 final switch(columnTypes[col]) with(ColumnType) 508 { 509 case Double: 510 return this.values.doubles[col]; 511 case Int: 512 return this.values.ints[col]; 513 case Long: 514 return this.values.longs[col]; 515 case String: 516 return this.values.strings[col]; 517 case Date: 518 return this.values.dates[col]; 519 case DateTime: 520 return this.values.dateTimes[col]; 521 } 522 } 523 T[] columnValues(T)(size_t col) 524 { 525 return this.columnValues(this.columnTitles[col]); 526 } 527 528 ColumnType[] findColumnTypes() 529 { 530 ColumnType[] ret; 531 foreach(title;this.columnTitles) 532 ret~=this.columnTypes[title]; 533 return ret; 534 } 535 size_t[] opSlice(size_t i)(size_t start, size_t end) 536 if ((i==0)||(i==1)) 537 { 538 return iota(start,end); 539 } 540 541 size_t opDollar(size_t i)() 542 { 543 static if (i==0) 544 return numRows; 545 else static if(i==1) 546 return numCols; 547 else static assert(0); 548 } 549 550 string toString() 551 { 552 string ret="Kaleidic Typed Dataframe: "~this.title~"\n\n"; 553 554 foreach(j;0..numCols) 555 ret~="\t"~this.columnTitles[j]; 556 ret~="\n"; 557 //log("numRows="~numRows.to!string); 558 //log("numCols="~numCols.to!string); 559 560 foreach(i;0..numRows) 561 { 562 //log("row: "~i.to!string~": "~this.indexValues[i].to!string); 563 foreach(j;columnTitles) 564 ret~=loadCell!string(i,j)~"\t"; 565 ret~="\n"; 566 } 567 return ret; 568 } 569 570 } 571